1 data summary

Laliga_2022 |> 
  glimpse()
## Rows: 7
## Columns: 18
## $ league        <chr> "La_liga", "La_liga", "La_liga", "La_liga", "La_liga", "…
## $ season        <chr> "2022/2023", "2022/2023", "2022/2023", "2022/2023", "202…
## $ match_id      <chr> "18962", "18963", "18964", "18965", "18966", "18967", "1…
## $ isResult      <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
## $ home_id       <chr> "157", "152", "231", "148", "261", "146", "208"
## $ home_team     <chr> "Osasuna", "Celta Vigo", "Real Valladolid", "Barcelona",…
## $ home_abbr     <chr> "OSA", "CEL", "VAL", "BAR", "CAD", "VAL", "ALM"
## $ away_id       <chr> "138", "141", "154", "145", "140", "223", "150"
## $ away_team     <chr> "Sevilla", "Espanyol", "Villarreal", "Rayo Vallecano", "…
## $ away_abbr     <chr> "SEV", "ESP", "VIL", "RVL", "SOC", "GIR", "RMA"
## $ home_goals    <dbl> 2, 2, 0, 0, 0, 1, 1
## $ away_goals    <dbl> 1, 2, 3, 0, 1, 0, 2
## $ home_xG       <dbl> 1.355710, 0.347363, 0.859075, 1.371290, 0.294358, 1.8787…
## $ away_xG       <dbl> 0.965574, 1.033400, 1.395090, 0.771530, 2.643670, 0.1321…
## $ datetime      <chr> "2022-08-12 19:00:00", "2022-08-13 15:00:00", "2022-08-1…
## $ forecast_win  <dbl> 0.4650, 0.0835, 0.2000, 0.5058, 0.0136, 0.8897, 0.0424
## $ forecast_draw <dbl> 0.3237, 0.3037, 0.2992, 0.2936, 0.0672, 0.1014, 0.1000
## $ forecast_loss <dbl> 0.2113, 0.6128, 0.5008, 0.2006, 0.9192, 0.0089, 0.8576
Laliga_2022 |> 
  head() 
## # A tibble: 6 × 18
##   league  season match_id isResult home_id home_team home_abbr away_id away_team
##   <chr>   <chr>  <chr>    <lgl>    <chr>   <chr>     <chr>     <chr>   <chr>    
## 1 La_liga 2022/… 18962    TRUE     157     Osasuna   OSA       138     Sevilla  
## 2 La_liga 2022/… 18963    TRUE     152     Celta Vi… CEL       141     Espanyol 
## 3 La_liga 2022/… 18964    TRUE     231     Real Val… VAL       154     Villarre…
## 4 La_liga 2022/… 18965    TRUE     148     Barcelona BAR       145     Rayo Val…
## 5 La_liga 2022/… 18966    TRUE     261     Cadiz     CAD       140     Real Soc…
## 6 La_liga 2022/… 18967    TRUE     146     Valencia  VAL       223     Girona   
## # … with 9 more variables: away_abbr <chr>, home_goals <dbl>, away_goals <dbl>,
## #   home_xG <dbl>, away_xG <dbl>, datetime <chr>, forecast_win <dbl>,
## #   forecast_draw <dbl>, forecast_loss <dbl>
# wide to long
Laliga_2022 <- Laliga_2022 |> 
  pivot_longer(cols = c(home_team,away_team),
               names_to = "home_away",
               values_to = "team") |>
  mutate(win = if_else(home_goals > away_goals,"home_team",
                       if_else(home_goals == away_goals,"draw","away_team")),
         point = if_else(home_away == win,3,
                         if_else(win == "draw",1,0))) |> 
  mutate(G = if_else(home_away == "home_team",home_goals,away_goals),
         GA = if_else(home_away == "home_team",away_goals,home_goals),
         xG = if_else(home_away == "home_team",home_xG,away_xG),
         xGA = if_else(home_away == "home_team",away_xG,home_xG)) 

1.1 ranking

Ranking <- Laliga_2022 |> 
  group_by(team) |> 
  summarise(points = sum(point),
            G = sum(G),
            xG = sum(xG)) |> 
  arrange(desc(points))
DT::datatable(Ranking,
              extensions = "Buttons",
              options = list(
                pageLength = 20,
                buttons = c("copy", "csv", "excel", "pdf", "print")
              ))
top_10 <- Ranking |> 
  head(10) |> 
  pull(team)


# ランキング推移
g <- Laliga_2022 |> 
  select(team,datetime,point) |> 
  group_by(team) |> 
  mutate(points = cumsum(point)) |> 
  filter(team %in% c(top_10)) |> 
  ggplot(aes(datetime,points, group = 1, color = team)) +
  geom_line()
plotly::ggplotly(g)

2 得点、期待値分析

2.1 xG_xGA

ゴール期待値とアシスト期待値の分布

# xG_xGA
Laliga_2022 |> 
  group_by(team) |> 
  summarise(xG = mean(xG),
            xGA = mean(xGA)) |> 
  ggplot(aes(xG,xGA, color = team, label = team)) + 
  geom_point()+
  ggrepel::geom_label_repel()+
  xlim(c(0,2.5))+
  ylim(c(0,2.5))
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_label_repel).

2.2 G_xG

実際のゴール数とゴール期待値の分布(累計)

Laliga_2022 |> 
  group_by(team) |> 
  summarise(G = mean(G),
            xG = sum(xG),
            xGA = sum(xGA)) |> 
  ggplot(aes(xG,G, color = team, label = team)) + 
  geom_point()+
  geom_abline(slope = 1)+
  ggrepel::geom_label_repel()